clear
set mem 2g
set more off

**Aaron Flaaen
**July 16, 2015
**Last Updated: August 30, 2017
**This file cleans up the DCA/UNI information on foreign ownership, and matches
** to the LFTTD data on wedges
**-------------------------------------------------------------------------



********************************************************************************
/* SUMMARY OF THIS FILE
FILES THAT MUST BE RUN FIRST: transfer_lbd_dca_combine.do 
INPUTS: MULT_LBD_`i'.dta.gz, wbdata.dta

**Step 1. Fix US Mult vs FOR Mult Issues (Within Year)
	1.1 Apply Manual Checks 
	1.2 Remove if firm is clearly non-mult ( there are no issues to check)
	1.3 Provided no disagreements, apply flags throughout firm (No use of JVs here)
	1.4 Use Uniworld Information for disagreements within DCA
	1.5 Use employment within year for remaining
		1.5.1: If one has at least 75% of employment share of matches, then re-classify
		1.5.2: If one has at least 50% and employment size of firm is small, then re-classify
		1.5.3: If there are any others, check manually
**Step 2. Manual Fixes (After reviewing manualcheck`i'.dta files)
	2.1 Put in Manual Fixes (run manualchecks.do)
	2.2 Apply Fixes and Append
**Step 3. Fix country_parent conflicts within year
	3.1 Check on whether US/For Mult disagreements have been solved
	3.2 Find disagreements in country_parent within for_mults
		**Duplicates 1: Those with NO foreign country parents
		**Duplicates 2: Those with only one foreign country parent
		**Duplicates 3: Those with multiple foreign country parents
		**Append Together
	3.3 Put in Manual Fixes (run manualchecks_formult.do)
**Step 4. Append Together, Prep for Smoothing Across Years
	4.1 Append Together
	4.2 Remove Never Mults
**Step 5. Rules for Smoothing Across Years
	5.1 Bring back in country_parentU variable
	5.2 Fill in One-Year Gaps in For-Mult or US-Mult Status
	5.3 Replace to nonmult one-year USMult or Formult Observations
	5.4 Smooth country_parent if country_parent only changes for one-year only (including US--> For --> US?)
	5.5 Fix Firms that we miss flagging in a non-Uniworld Year 
	5.6 Fix end years if we flag them as mults in all other years
**Step 6. Clean up and Save

OUTPUT: transfer_`i'_clean.dta						      */ 
********************************************************************************

cd $dir


**--------------------------------------------------------------------------
**Step 1. Fix US Mult vs FOR Mult Issues (Within Year)
**--------------------------------------------------------------------------


forvalues i = 1993(1)2011 {


	!gunzip MULT_LBD_`i'.dta.gz
	
	use MULT_LBD_`i'.dta, clear
	
	replace flag_us_mult = 0 if flag_us_mult==.
	replace flag_for_mult = 0 if flag_for_mult==.
	replace flag_us_multU = 0 if flag_us_multU==.
	replace flag_for_multU = 0 if flag_for_multU==.
		
	
	**1.1 Apply Manual Checks 

	**US Mult Fixes
	
	
	bys firmid: egen maxflagusfirm = max(flag_us_mult)
	bys firmid: egen maxflagforfirm = max(flag_for_mult)
	bys firmid: egen maxflagusUfirm = max(flag_us_multU)
	bys firmid: egen maxflagforUfirm = max(flag_for_multU)
	
	bys firmid: egen minflagusfirm = min(flag_us_mult)
	bys firmid: egen minflagforfirm = min(flag_for_mult)
	bys firmid: egen minflagusUfirm = min(flag_us_multU)
	bys firmid: egen minflagforUfirm = min(flag_for_multU)
	
	
	**1.2 Remove if firm is clearly non-mult ( there are no issues to check)
	gen nonmult = 0
	replace nonmult = 1 if maxflagforfirm==0 & maxflagusfirm==0 & maxflagforUfirm~=1 & maxflagusUfirm~=1
	
	preserve
	keep if nonmult ==1
	save nonmult_`i'.dta, replace
	!gzip nonmult_`i'.dta
	restore
	drop if nonmult==1
	
	
	**1.3 Provided no disagreements, apply flags throughout firm
	
	**Check if there are ANY JVs in the firm
	gen jv = 0
	replace jv = 1 if company_type=="Joint Venture"
	bys firmid: egen anyjvs = max(jv)
	
	**!! DOING THIS REGARDLESS OF JVS RIGHT NOW (BEFORE CONDITIONING WITH ANYJVS=0)
	**If there is never a formult match, but there is at least one usmult match, replace all with usmult
	replace flag_us_mult = maxflagusfirm if maxflagforfirm==0 & maxflagforUfirm~=1 & maxflagusfirm==1 
	replace country_parent = "United States" if maxflagforfirm==0 & maxflagforUfirm~=1 & maxflagusfirm==1 
	**combine with uniworld information
	replace flag_us_mult = maxflagusUfirm if maxflagforUfirm!=1 & maxflagforfirm==0 & maxflagusUfirm==1 
	replace country_parent = "United States" if maxflagforUfirm!=1 & maxflagforfirm==0 & maxflagusUfirm==1 
	replace flag_us_multU = maxflagusUfirm if maxflagforUfirm!=1 & maxflagforfirm==0 & maxflagusUfirm==1 
	
	
	**If there is never a usmult match, but there is at least one formult match, replace all with formult
	replace flag_for_mult = maxflagforfirm if maxflagforfirm==1 & maxflagusUfirm~=1 & maxflagusfirm==0 
	replace country_parent = "" if country_parent=="United States" & maxflagforfirm==1 & maxflagusUfirm~=1 & maxflagusfirm==0 
	**combine with uniworld information
	replace flag_for_mult = maxflagforUfirm if maxflagforUfirm==1 & maxflagusfirm==0 & maxflagusUfirm!=1 
	replace country_parent = "" if country_parent=="United States" & maxflagforUfirm==1 & maxflagusfirm==0 & maxflagusUfirm!=1 
	replace flag_for_multU = maxflagforUfirm if maxflagforUfirm==1 & maxflagusfirm==0 & maxflagusUfirm!=1 
	
	
	drop maxflagusfirm maxflagforfirm maxflagusUfirm maxflagforUfirm
	bys firmid: egen maxflagusfirm = max(flag_us_mult)
	bys firmid: egen maxflagforfirm = max(flag_for_mult)
	bys firmid: egen maxflagusUfirm = max(flag_us_multU)
	bys firmid: egen maxflagforUfirm = max(flag_for_multU)
	
	
	**1.4 Use Uniworld Information for disagreements within DCA
	
	**If DCA disagrees (us-mult and for-mult) then check Uniworld
	replace flag_us_mult=1 if maxflagusfirm==1 & maxflagforfirm==1 & maxflagusUfirm==1
	replace country_parent="United States" if maxflagusfirm==1 & maxflagforfirm==1 & maxflagusUfirm==1
	replace flag_for_mult=0 if maxflagusfirm==1 & maxflagforfirm==1 & maxflagusUfirm==1
	
	replace flag_us_mult=0 if maxflagusfirm==1 & maxflagforfirm==1 & maxflagforUfirm==1
	replace flag_for_mult=1 if maxflagusfirm==1 & maxflagforfirm==1 & maxflagforUfirm==1
	replace country_parent = "" if country_parent=="United States" & maxflagusfirm==1 & maxflagforfirm==1 & maxflagforUfirm==1
	
	**If DCA and Uniworld disagree, then choose DCA
	replace flag_for_mult=1 if maxflagforfirm==1 & maxflagusfirm==0 & maxflagusUfirm==1
	replace country_parent = "" if country_parent=="United States" & maxflagforfirm==1 & maxflagusfirm==0 & maxflagusUfirm==1
	replace flag_us_mult=0 if maxflagforfirm==1 & maxflagusfirm==0 & maxflagusUfirm==1
	
	replace flag_us_mult=1 if maxflagusfirm==1 & maxflagforfirm==0 & maxflagforUfirm==1
	replace country_parent = "United States" if maxflagusfirm==1 & maxflagforfirm==0 & maxflagforUfirm==1
	replace flag_for_mult=0 if maxflagusfirm==1 & maxflagforfirm==0 & maxflagforUfirm==1
	
	drop maxflagusfirm maxflagforfirm maxflagusUfirm maxflagforUfirm
	bys firmid: egen maxflagusfirm = max(flag_us_mult)
	bys firmid: egen maxflagforfirm = max(flag_for_mult)
	bys firmid: egen maxflagusUfirm = max(flag_us_multU)
	bys firmid: egen maxflagforUfirm = max(flag_for_multU)
	
	**Keep those that we've fixed with this step
	preserve
	keep if (maxflagusfirm==0 & maxflagforfirm==0) | (maxflagusfirm==1 & maxflagforfirm==0) | (maxflagusfirm==0 & maxflagforfirm==1)
	save checked_MNs_`i'.dta, replace
	restore
	keep if (maxflagusfirm==1 & maxflagforfirm==1) 
	
	
	**Step 1.5 Use employment within year for remaining

	
	**Only use information from matches corresponding to DCA information
	replace flag_for_mult=. if companynumber==""
	replace flag_us_mult=. if companynumber==""
	
	**Create firm-match employment (only corresponding to matched obsv)
	bys firmid: egen sumemp = sum(emp)
	gen missemp = emp if flag_us_mult==. & flag_for_mult==.
	replace missemp  = 0 if missemp==.
	bys firmid: egen missempus = sum(missemp)
	gen cemp = sumemp-missempus
	
	
	bys firmid flag_for_mult: egen femp = sum(emp)
	bys firmid flag_us_mult: egen uemp = sum(emp)
	
	gen fshare = femp/cemp if flag_for_mult==1
	replace fshare = 0 if fshare ==.
	gen ushare = uemp/cemp if flag_us_mult==1
	replace ushare = 0 if ushare ==.
		
	bys firmid: egen forshare = max(fshare)
	bys firmid: egen usshare = max(ushare)
	
	drop ushare fshare
	
	**1.5.1: If one has at least 75% of employment share of matches, then re-classify
	replace flag_us_mult = 1 if usshare>0.75 
	replace companynumber="" if flag_for_mult == 1 & usshare>0.75 
	replace country_parent = "United States" if flag_for_mult == 1 & usshare>0.75 
	replace flag_for_mult = 0 if usshare>0.75 
	
	replace flag_for_mult = 1 if forshare>0.75
	replace country_parent = "" if country_parent=="United States" & forshare>0.75
	replace companynumber="" if flag_us_mult == 1 & forshare>0.75 
	replace flag_us_mult = 0 if forshare>0.75 
	
	
	**1.5.2: If one has at least 50% and employment size of firm is small, then re-classify
	replace flag_us_mult = 1 if usshare>0.50 & sumemp<10000
	replace companynumber="" if flag_for_mult == 1 & usshare>0.50 & sumemp<10000
	replace country_parent = "United States" if flag_for_mult == 1 & usshare>0.50 & sumemp<10000 
	replace flag_for_mult = 0 if usshare>0.50 & sumemp<10000 
	
	replace flag_for_mult = 1 if forshare>0.50 & sumemp<10000
	replace country_parent = "" if country_parent=="United States" & forshare>0.50 & sumemp<10000
	replace companynumber="" if flag_us_mult == 1 & forshare>0.50 & sumemp<10000
	replace flag_us_mult = 0 if forshare>0.50 & sumemp<10000
	
	**1.5.3: If there are any others, check manually
	capture drop dup
	bys firmid: gen dup = _N
	bys firmid flag_us_mult: gen dup2 = _N
	bys firmid flag_for_mult: gen dup3 = _N
	
	gen exam = 0
	replace exam = 1 if (dup~=dup2) & (dup~=dup3)
	
	save empchecks_temp`i'.dta, replace
	
	keep if exam==1
	save manualchecks`i'.dta, replace
	
	use empchecks_temp`i'.dta, clear
	drop if exam==1
	save multi_clean`i'.dta, replace
	erase empchecks_temp`i'.dta
	
	!gzip MULT_LBD_`i'.dta

}

**--------------------------------------------------------------------------
**Step 2. Manual Fixes (After reviewing manualcheck`i'.dta files)
**--------------------------------------------------------------------------	

**Step 2.1 Put in Manual Fixes
**Check that there are no new checks to do in post 2005 data
do manualchecks.do



**Step 2.2 Apply Fixes and Append


forvalues i = 1993(1)2011 {

	use checked_MNs_`i'.dta, clear
	append using multi_clean`i'.dta
	append using manualchecks`i'.dta

	drop femp uemp usshare forshare cemp sumemp missemp missempus dup*
	
	replace flag_us_mult = 0 if flag_us_mult==.
	replace flag_for_mult = 0 if flag_for_mult==.
	
	save MN_firms_`i'.dta, replace
	!gzip MN_firms_`i'.dta
	
	erase multi_clean`i'.dta
	erase manualchecks`i'.dta
	erase checked_MNs_`i'.dta
}


	
**--------------------------------------------------------------------------
**Step 3. Fix country_parent conflicts within year
**--------------------------------------------------------------------------	

forvalues i = 1993(1)2011 {
	di `i'
	**Step 3.1 Check on whether US/For Mult disagreements have been solved
	!gunzip MN_firms_`i'.dta.gz
	use MN_firms_`i'.dta, clear
	
	replace country_parent = country_parentU if flag_for_multU==1 & flag_for_mult==0 & flag_us_mult==0
	replace flag_for_mult = 1 if flag_for_multU==1 & flag_for_mult==0 & flag_us_mult==0
	

	drop maxflagusfirm maxflagforfirm maxflagusUfirm maxflagforUfirm
	bys firmid: egen maxflagusfirm = max(flag_us_mult)
	bys firmid: egen maxflagforfirm = max(flag_for_mult)
	bys firmid: egen maxflagusUfirm = max(flag_us_multU)
	bys firmid: egen maxflagforUfirm = max(flag_for_multU)
	
	replace country_parent = "United States" if maxflagusfirm==1 & maxflagforfirm==0 & maxflagforUfirm==0 & flag_for_mult==0
	replace country_parent = country_parentU if country_parent=="" & flag_for_mult==1
	replace country_parent = "United States" if flag_for_mult==0 & country_parent==""
	*replace country_parent = "unknown" if country_parent=="" & flag_for_mult==1
	
	
	assert (maxflagusfirm==0 & maxflagforfirm==0) | (maxflagusfirm==1 & maxflagforfirm==0) | (maxflagusfirm==0 & maxflagforfirm==1)
	assert country_parent~="United States" if flag_for_mult==1

	**Bring through country_parent if no disagreements and country_parent==""
	
	
	**Step 3.2 Find disagreements in country_parent within for_mults
	collapse (sum) emp pay (first) companynumber, by(country_parent firmid flag_us_mult flag_for_mult)
	duplicates tag firmid, gen(dup)

	tab dup
	preserve
	keep if dup==0
	drop dup
	save temp_MULT_nodup.dta, replace
	restore
	drop if dup==0
	
	*Check to see whether there will be any where dup>0 & numflag_f==0 
	
	bys firmid: egen sumemp = sum(emp)
	gen missemp = emp if country_parent==""
	bys firmid: egen missempus = total(missemp)
	replace missempus = 0 if missempus==.
	gen cemp = sumemp-missempus
	gen flag_f = 0
	replace flag_f = 1 if country_parent~="United States" & country_parent~=""
	bys firmid: egen numflag_f = sum(flag_f)
	preserve
	
	**Duplicates 1: Those with NO foreign country parents
	**----------------------------------------------------------------------
	keep if numflag_f==0
	replace country_parent = "United States"
	bys firmid: egen maxflagus = max(flag_us_mult)
	replace flag_us_mult = maxflagus
	assert flag_for_mult==0
	capture collapse (sum) emp pay, by(country_parent firmid flag_us_mult flag_for_mult)
	capture duplicates tag firmid, gen(dup)
	assert dup==0
	capture drop dup
	save easyusdups.dta, replace	


	restore
	preserve
	
	
	**Duplicates 2: Those with only one foreign country parent
	**----------------------------------------------------------------------
	keep if numflag_f==1
	gen foreigncshare = emp/cemp if flag_f==1
	replace foreigncshare = 0 if foreigncshare ==.
	gen uscshare = emp/cemp if country_parent=="United States"
	replace uscshare = 0 if uscshare==.
	bys firmid: egen forshare = max(foreigncshare) if numflag_f==1
	bys firmid: egen usshare = sum(uscshare)
	gen newcountry = ""
	replace newcountry = country_parent if (forshare==foreigncshare) & forshare>0.75
	replace newcountry = "United States" if usshare>0.75
	replace newcountry = country_parent if (forshare==foreigncshare) & forshare>0.50 & sumemp<10000
	replace newcountry = "United States"  if usshare>0.50 & sumemp<10000
	replace newcountry = country_parent if (forshare==foreigncshare) & forshare==0.50 & sumemp<10000
	**A few cases where picked up country_parent with zero emp.
	replace newcountry = country_parent if forshare==0 & usshare==0
	replace newcountry = "United States" if sumemp==0
	gen examflag = 0
	replace examflag = 1 if (forshare>0.25 & forshare<0.75 & sumemp>10000)
	replace examflag = 1 if (usshare>0.25 & usshare<0.75 & sumemp>10000)

	sort firmid
	save multothers.dta, replace
	keep if examflag==1
	save exammultothers`i'.dta, replace
	use multothers.dta, clear
	drop if examflag==1
	keep newcountry firmid
	drop if newcountry==""
	duplicates drop
	sort firmid
	merge 1:m firmid using multothers.dta
	**tab _m==2 for examflag==1
	drop if _m==2
	drop _m
	replace country_parent=newcountry
	drop newcountry
	
	bys firmid: egen maxflag_us = max(flag_us_mult)
	replace flag_us_mult = maxflag_us if country_parent=="United States"
	replace flag_us_mult = 0 if country_parent~="United States"
	
	collapse (sum) emp pay, by(country_parent firmid flag_us_mult)
	duplicates tag firmid, gen(dupcheck)
	assert dupcheck==0
	drop dupcheck
	save multothers_corrected.dta, replace
	
	
	restore
	
	
	**Duplicates 3: Those with multiple foreign country parents
	**----------------------------------------------------------------------
	keep if numflag_f>1
	**Calculate employment share
	gen foreigncshare = emp/cemp if country_parent~=""
	replace foreigncshare = 0 if foreigncshare == .
	bys firmid: egen maxfor = max(foreigncshare)
	gen newcountry = country_parent if foreigncshare ==maxfor
	gen examflag = 0
	**Flag for manual review if one country_parent does not occupy >75% of total identified
	replace examflag = 1 if maxfor<0.75 & sumemp>10000
	replace examflag = 1 if maxfor==0.5
	replace examflag = 1 if maxfor==0
	sort firmid
	save multforeign.dta, replace
	keep if examflag==1
	save exammultforeign`i'.dta, replace
	
	use multforeign.dta, clear
	drop if examflag==1
	keep newcountry firmid
	drop if newcountry==""
	duplicates drop
	sort firmid
	merge 1:m firmid using multforeign.dta
	**tab _m==2 for examflag==1
	drop if _m==2
	drop _m
	replace country_parent=newcountry
	drop newcountry
	
	bys firmid: egen maxflag_us = max(flag_us_mult)
	replace flag_us_mult = maxflag_us if country_parent=="United States"
	replace flag_us_mult = 0 if country_parent~="United States"
	
	replace flag_for_mult = 1 if country_parent~="United States"
	replace flag_for_mult = 0 if country_parent=="United States"
	
	collapse (sum) emp pay, by(country_parent firmid flag_us_mult flag_for_mult)
	duplicates tag firmid, gen(dupcheck)
	assert dupcheck==0
	drop dupcheck
	
	**Append Together
	append using multothers_corrected.dta
	append using easyusdups.dta
	append using temp_MULT_nodup.dta

	
	save mult_corrected_`i'.dta, replace
	erase multothers_corrected.dta
	erase easyusdups.dta
	erase temp_MULT_nodup.dta
	*erase use MN_firms_`i'.dta

}




**Check to see if there are ADDITIONAL MANUAL FIXES!!!!!!!!!!





**----------------------------
**Step 3.3 Put in Manual Fixes
do manualchecks_formult.do
**----------------------------

**--------------------------------------------------------------------------
**Step 4. Append Together, Prep for Smoothing Across Years
**--------------------------------------------------------------------------	

**Step 4.1 Append Together
!gunzip nonmult_1993.dta.gz
use nonmult_1993.dta, clear
append using mult_corrected_1993.dta
append using manualchecks1993_formult.dta
replace country_parent="United States" if country_parent==""
collapse (sum) emp pay, by(firmid flag_us_mult flag_for_mult country_parent)
bys firmid: gen dup = _N
assert dup==1
drop dup
gen year = 1993

rename country_parent country
merge m:1 country using wbdata.dta, keepusing(country ifscode)
drop if _m==2
replace ifscode=0 if _m==1
drop _m
drop country

save MN_presmoothing.dta,replace
!gzip nonmult_1993.dta

forvalues i = 1994(1)2011 {

	!gunzip nonmult_`i'.dta.gz
	use nonmult_`i'.dta, clear
	append using mult_corrected_`i'.dta
	append using manualchecks`i'_formult.dta
	replace country_parent="United States" if country_parent==""
	collapse (sum) emp pay, by(firmid flag_us_mult flag_for_mult country_parent)
	bys firmid: gen dup = _N
	assert dup==1
	drop dup
	gen year = `i'
	
	rename country_parent country
	merge m:1 country using wbdata.dta, keepusing(country ifscode)
	drop if _m==2
	replace ifscode=0 if _m==1
	drop _m
	drop country 
	
	append using MN_presmoothing.dta
	compress
	save MN_presmoothing.dta, replace
	!gzip nonmult_`i'.dta
}

**Step 4.2 Remove Never Mults
use MN_presmoothing.dta, clear	
replace ifscode = 111 if ifscode==.
replace flag_for_mult=1 if flag_for_mult==. & ifscode~=111

**Before doing anything, remove firms that are never mults
bys firmid: egen maxflagusmult = max(flag_us_mult)
bys firmid: egen maxflagformult = max(flag_for_mult)
preserve
keep if maxflagusmult==0 & maxflagformult==0
drop maxflagusmult maxflagformult
save nevermult.dta, replace
!gzip nevermult.dta
restore
keep if maxflagusmult==1 | maxflagformult==1
drop maxflagusmult maxflagformult
save for_smoothing_temp.dta, replace
*erase MN_presmoothing.dta


**--------------------------------------------------------------------------
**Step 5. Rules for Smoothing Across Years
**--------------------------------------------------------------------------	


**Step 5.1 Bring back in country_parentU variable
foreach num of numlist 1995 1998 2000 2002 2004 2006 2008 2011  {
	!gunzip MULT_LBD_`num'.dta.gz
	use MULT_LBD_`num'.dta, clear
	keep if flag_for_multU==1
	keep firmid country_parentU
	rename country_parentU country
	merge m:1 country using wbdata.dta, keepusing(country ifscode)
	drop if _m==2
	replace ifscode=0 if _m==1
	drop _m
	drop country 
	
	**There can be doubles in Uniworld as well.
	duplicates drop
	bys firmid: gen dups = _n
	tab dups
	reshape wide ifscode, i(firmid) j(dups)
	
	save temp_uni_parent`num'.dta, replace
	!gzip MULT_LBD_`num'.dta
}
	
foreach num of numlist 1995 1998 2000 2002 2004 2006 2008 2011  {
	
	di `num'
	use for_smoothing_temp.dta, clear
	replace flag_for_mult=1 if ifscode~=111
	keep if year==`num'
	merge 1:m firmid using temp_uni_parent`num'.dta, keepusing(firmid ifscode1 ifscode2)
	drop if _m==2
	drop _m
	replace ifscode = ifscode1 if ifscode==111 & ifscode1~=111 & ifscode1~=. & flag_for_mult==1 & ifscode2==.
	tab year if flag_for_mult==1 &ifscode==111 & ifscode2~=.
	*drop ifscode1 ifscode2
	save for_smoothing_temp_`num'.dta, replace
}

foreach num of numlist 1993 1994 1996 1997 1999 2001 2003 2005 2007 2009 2010  {
	
	use for_smoothing_temp.dta, clear
	replace flag_for_mult=1 if ifscode~=111
	keep if year==`num'
	save for_smoothing_temp_`num'.dta, replace
}
	
use for_smoothing_temp_1993.dta
forvalues i = 1994(1)2011 {
	append using for_smoothing_temp_`i'.dta
	save for_smoothing_temp.dta, replace
}

forvalues i = 1993(1)2011 {
	erase for_smoothing_temp_`i'.dta
}

	

	


use for_smoothing_temp.dta, clear
replace ifscode=ifscode1 if flag_for_mult==. & ifscode1~=.
replace flag_for_mult=1 if flag_for_mult==. & ifscode~=111
replace flag_for_mult=0 if flag_for_mult==.
**Rules for smoothing across years
egen id = group(firmid)
tsset id year, yearly

*replace ifscode = ifscode1 if ifscode==111 & ifscode1~=111 & ifscode1~=. & flag_for_mult==1 & ifscode2==.


**Step 5.2 Fill in One-Year Gaps in For-Mult or US-Mult Status
replace flag_for_mult = 1 if f.flag_for_mult==1 & l.flag_for_mult==1 & flag_for_mult==0
replace flag_us_mult = 0 if f.flag_for_mult==1 & l.flag_for_mult==1 & flag_us_mult==1

replace ifscode = f.ifscode if f.ifscode~=111 & l.ifscode~=111 & f.ifscode==l.ifscode & ifscode==111 & f.ifscode~=.

replace flag_us_mult = 1 if f.flag_us_mult==1 & l.flag_us_mult==1 & flag_us_mult==0
replace flag_for_mult = 0 if f.flag_us_mult==1 & l.flag_us_mult==1 & flag_for_mult==1

replace ifscode = f.ifscode if f.ifscode==111 & l.ifscode==111 & f.ifscode==l.ifscode & ifscode~=111 & f.ifscode~=. & f.flag_us_mult==1 & l.flag_us_mult==1 & flag_us_mult==0


**Step 5.3 Replace to nonmult one-year USMult or Formult Observations
gen flag_any=flag_us_mult+flag_for_mult
bys firmid: egen numflags=sum(flag_any)
drop flag_any
replace flag_us_mult=0 if flag_us_mult==1 & numflags==1 
replace ifscode = 111 if flag_for_mult==1 & numflags==1
replace flag_for_mult=0 if flag_for_mult==1 & numflags==1 
drop numflags


**Step 5.4 Smooth country_parent if country_parent only changes for one-year only (including US--> For --> US?)
sort id year
replace ifscode = f.ifscode if f.ifscode~=111 & l.ifscode~=111 & l.flag_for_mult==1 & f.flag_for_mult==1 & f.ifscode==l.ifscode & ifscode~=f.ifscode


**Step 5.5 Fix Firms that we miss flagging in a non-Uniworld Year 
bys firmid: egen minyear = min(year)
bys firmid: egen maxyear = max(year)
sort id year
gen gap_uni=1 if l.flag_for_mult==1 & f2.flag_for_mult==1 & flag_for_mult==0 & flag_us_mult==0 & (year==1996 | year==2009)
replace gap_uni=1 if l2.flag_for_mult==1 & f.flag_for_mult==1 & flag_for_mult==0 & flag_us_mult==0 & (year==1997 | year==2010)
replace flag_for_mult=1 if gap_uni==1 & year!=minyear & year!=maxyear
replace flag_us_mult=0 if gap_uni==1 & year!=minyear & year!=maxyear
replace ifscode = l.ifscode if gap_uni==1 & year!=minyear & year!=maxyear
drop gap_uni
	
gen gap_uni=1 if l.flag_us_mult==1 & f2.flag_us_mult==1 & flag_for_mult==0 & flag_us_mult==0 & (year==1994 | year==1997)
replace gap_uni=1 if l2.flag_us_mult==1 & f.flag_us_mult==1 & flag_for_mult==0 & flag_us_mult==0 & (year==1995 | year==1998)
replace flag_us_mult=1 if gap_uni==1 & year!=minyear & year!=maxyear
replace flag_for_mult=0 if gap_uni==1 & year!=minyear & year!=maxyear
drop gap_uni
	

**Step 5.6 Fix end years if we flag them as mults in all other years
bys firmid: gen numpossible = _N
bys firmid: egen sumflagus= sum(flag_us_mult)
by firmid: egen sumflagfor= sum(flag_for_mult)
gen diff_us= numpossible - sumflagus
gen diff_for=numpossible - sumflagfor

bys firmid: replace flag_us_mult=1 if flag_us_mult==0 & diff_us==1
by firmid: replace flag_for_mult=0 if diff_us==1
by firmid: replace flag_for_mult=1 if flag_for_mult==0 & diff_for==1

by firmid: replace flag_us_mult=0 if diff_for==1
drop diff* sumflagus sumflagfor

sort id year
replace ifscode = l.ifscode if flag_for_mult==1 & ifscode==111 & l.ifscode~=111 & l.flag_for_mult==1 & year==2011
replace ifscode = f.ifscode if flag_for_mult==1 & ifscode==111 & f.ifscode~=111 & f.flag_for_mult==1 & year==1993

**--------------------------------------------------------------------------
**Step 6. Clean up and Save
**--------------------------------------------------------------------------	

**Hopefully this checks out
gen test = flag_us_mult+flag_for_mult
assert test<2

drop numpossible maxyear test minyear id ifscode1 ifscode2
!gunzip nevermult.dta.gz

append using nevermult.dta
tab year 

forvalues i = 1993(1)2011 {
	preserve
	keep if year==`i'
	save lbd_dca_`i'_clean.dta, replace
	restore
}


erase nevermult.dta
erase for_smoothing_temp.dta
erase MN_presmoothing.dta

forvalues i = 1993(1)2011 {
	erase use MN_firms_`i'.dta
	erase mult_corrected_`i'.dta
}

